sn10x, Colon

(only 1/2 datasize to standard level)

Baf53cre ENS neurons,
CR infection 7d, CTL x4 CKO(Ahr-cko) x4

loading 60k cells for all,
cellranger called 24,758 cells

pure neurons downstream

load dependancies

load obj

GEX.seur <- readRDS("./GEX0925.seur.preAnno.rds")
GEX.seur
## An object of class Seurat 
## 20978 features across 13055 samples within 1 assay 
## Active assay: RNA (20978 features, 1112 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
color.FB <- ggsci::pal_d3("category20c")(20)[c(1,6,11,16,
                                               2,7,12,17
                                               )]

color.cnt <- color.FB[c(1,5)]

color.pre <-c("#678BB1","#8AB6CE","#3975C1","#669FDF","#4CC1BD",
              "#BF7E6B","#D46B35","#F19258","#FF8080",
              "#BDAE8D","#BD66C4","#C03778",
              "#97BA59","#DFAB16","#2BA956","#9FE727"#,
              #"red"
              )
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno", cols = c(color.pre,"red") ) +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

filtering

GEX.seur <- subset(GEX.seur,subset = preAnno != "Mix" & DoubletFinder0.1 == "Singlet")
GEX.seur
## An object of class Seurat 
## 20978 features across 11666 samples within 1 assay 
## Active assay: RNA (20978 features, 1112 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno", cols = color.pre) +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

markers.new.s <- list(EMN=c("Chat","Bnc2","Tox","Ptprt",
                       "Gfra2","Oprk1","Adamtsl1", 
                       "Fbxw15","Fbxw24","Chrna7",
                       "Satb1","Cntnap5b",
                       "Gabrb1","Nxph1","Lama2","Lrrc7",
                       "Ryr3","Eda","Tac1",
                       "Kctd8","Ntrk2","Penk",
                       "Fut9","Nfatc1","Egfr","Mgll",
                       "Chrm3"
                       ),
                 IMN=c("Nos1","Kcnab1",
                       "Gfra1","Etv1",
                       "Man1a","Airn",
                       "Adcy2","Cmah","Creb5","Vip","Pde1a",
                       "Ebf1","Gpc5"
                       ),
                 IN=c("Npas3","Synpr","St18","Gal",
                      "Kcnk13",
                      "Neurod6","Moxd1","Sctr",
                      "Piezo1","Sst","Adamts9","Kcnn2",
                      "Calb2"),
                 IPAN=c("Calcb","Nmu","Adgrg6","Pcdh10",
                        "Ngfr","Galr1","Il7","Aff2",
                        "Gpr149","Cdh6","Cdh8",
                        "Clstn2","Ano2","Ntrk3",
                        "Cpne4","Vwc2l","Cdh9","Scgn",
                        "Vcan","Cck","Piezo2","Kcnh7",
                        "Rerg","Bmpr1b","Skap1","Ntng1",
                        "Tafa2","Nxph2"),
                 CONTAM=c("Actl6b","Phox2b"))

pm.All_new.s <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="All")
pm.All_new.s

pm.CTL_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CTL")), features = as.vector(unlist(markers.new.s)), group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CTL only")
pm.CTL_new.s

pm.CKO_new.s <- DotPlot(subset(GEX.seur,subset=cnt %in% c("CKO")), features = as.vector(unlist(markers.new.s)), group.by = "preAnno",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="CKO only")
pm.CKO_new.s

VlnPlot(subset(GEX.seur, subset = cnt %in% c("CTL")), 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 2, pt.size = 0.01, group.by = "preAnno")

VlnPlot(subset(GEX.seur, subset = cnt %in% c("CKO")), 
        features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 2, pt.size = 0.01, group.by = "preAnno")

re-clustering

GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)

# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)

# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2

head(VariableFeatures(GEX.seur), 200)
##   [1] "Cntnap2"       "Mgat4c"        "Zfp804a"       "Cntn5"        
##   [5] "Sgcz"          "Robo2"         "Nmu"           "Klhl1"        
##   [9] "Lingo2"        "Gm32647"       "Cdh8"          "Kcnb2"        
##  [13] "Cpne4"         "Sst"           "Gm30613"       "Pcdh9"        
##  [17] "4930432L08Rik" "Kctd16"        "Grm7"          "Gm21847"      
##  [21] "Adgrg6"        "Ntng1"         "Csmd1"         "Rbfox1"       
##  [25] "Gm29521"       "Brinp3"        "Car10"         "Nrxn3"        
##  [29] "Galntl6"       "Sema3e"        "Mmrn1"         "Asic2"        
##  [33] "Dgkg"          "Gm26612"       "Tmeff2"        "Gm15680"      
##  [37] "Cdh9"          "Unc5d"         "Ano5"          "Chsy3"        
##  [41] "Gpr149"        "Prkg1"         "Astn2"         "Pcdh11x"      
##  [45] "Kcnq5"         "Gm12296"       "Ano2"          "Kirrel3"      
##  [49] "Ebf1"          "Schip1"        "Cdh18"         "Gm15261"      
##  [53] "Gpc6"          "Cenpf"         "Grik1"         "Lrp1b"        
##  [57] "Ptprt"         "M1ap"          "Trps1"         "B230323A14Rik"
##  [61] "L3mbtl4"       "Dgki"          "Gpc5"          "Fut9"         
##  [65] "Tafa1"         "Spock3"        "D030068K23Rik" "Xirp2"        
##  [69] "Grm8"          "Opcml"         "Kcnh7"         "Tafa2"        
##  [73] "Plxna4"        "Nxph1"         "Pde7b"         "1700111E14Rik"
##  [77] "March1"        "Nxph2"         "mt-Co3"        "Pde4d"        
##  [81] "Nrg1"          "St8sia2"       "Col25a1"       "Itgb6"        
##  [85] "Zfp804b"       "Dcc"           "Pbx3"          "Kctd8"        
##  [89] "Plcl1"         "Avil"          "Gm49127"       "6330411D24Rik"
##  [93] "Prox1"         "A330008L17Rik" "Tac1"          "Gm30094"      
##  [97] "Zfhx3"         "Chrm3"         "Alk"           "Inpp4b"       
## [101] "Pdzrn4"        "4930509J09Rik" "Vip"           "mt-Atp6"      
## [105] "Gm28153"       "Casp8"         "Kcnmb2"        "Cacna2d3"     
## [109] "Cpa6"          "Rfx6"          "Dgkb"          "1700063D05Rik"
## [113] "Nog"           "Ntrk3"         "Ccbe1"         "Dlc1"         
## [117] "Egflam"        "Pcdh10"        "Pde4b"         "Cysltr2"      
## [121] "1700034P13Rik" "Shisa6"        "Lgals9"        "Gna14"        
## [125] "AC150683.1"    "Cmah"          "Wdr17"         "Robo1"        
## [129] "Dlgap1"        "Dock1"         "Clstn2"        "Cdh6"         
## [133] "Grin3a"        "Gm20754"       "Kif26b"        "Csmd3"        
## [137] "Gm13376"       "Bloc1s6os"     "Gimap6"        "Gm5737"       
## [141] "Gabra1"        "Spag5"         "2900045O20Rik" "Ccdc68"       
## [145] "Eya1"          "9530026P05Rik" "Gm20757"       "Myl1"         
## [149] "Serpinb8"      "Olfr889"       "Adarb2"        "Dock10"       
## [153] "Unc5c"         "Piezo2"        "Abca9"         "Tenm2"        
## [157] "Erbb4"         "Fhit"          "Bmpr1b"        "Gm16685"      
## [161] "Rab27b"        "Vgll3"         "Stac"          "Fstl4"        
## [165] "Mast4"         "Skap1"         "Ssc4d"         "Hcn1"         
## [169] "Galnt18"       "Calcb"         "A530053G22Rik" "mt-Co2"       
## [173] "Gm28376"       "Gm4128"        "Cntn4"         "Gm15584"      
## [177] "Dab1"          "mt-Co1"        "Ror1"          "5530401A14Rik"
## [181] "Pdyn"          "Tacr1"         "Trp53cor1"     "2610307P16Rik"
## [185] "Gm46329"       "Il1rapl1"      "Slc4a4"        "9330185C12Rik"
## [189] "Sema5a"        "Serpini2"      "Cyp2j13"       "Smpdl3b"      
## [193] "Gm15866"       "Fendrr"        "Col22a1"       "Gm49418"      
## [197] "Meltf"         "4930470H14Rik" "1700082M22Rik" "Gm5086"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix

PCA

# exclude MT genes  and more 

#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)

DIG <- grep("^mt-|^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AI|^AW|^BC|^Gm|^Hist|Rik$|-ps",
            rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))


VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
                                                c(#MT_gene,
                                                  DIG, 
                                                  CC_gene) )

GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1 
## Positive:  Nos1, Cmah, Dgkb, Epha5, Rgs6, Alcam, Cacnb2, Slc44a5, Thsd7a, Hmcn1 
##     Thsd7b, Gfra1, Lrrc4c, Syn3, Etv1, Auts2, Cadps2, Dpp10, Timp3, Kcnip4 
##     Rarb, Creb5, Vwa5b1, Stxbp6, Col25a1, Rgs7, Kcnj3, Unc13c, Egfem1, Synpo2 
## Negative:  Rbfox1, Tafa1, Gpc6, Pde4b, Alk, Bnc2, Ptprt, Tafa2, Unc5c, Grik1 
##     Stxbp5l, Plxna4, Pbx1, Pld5, Unc5d, Cpne8, Pcdh7, Plcl1, Lingo2, Lrp1b 
##     Cacna1e, Kalrn, Grm7, Chsy3, Nell1, Dlgap2, Casz1, Cdh18, Chat, Syn2 
## PC_ 2 
## Positive:  Mgat4c, Sgcz, Tmeff2, Auts2, Cdh8, Robo2, Cntn5, Zfp804a, Schip1, Kcnq5 
##     Pbx3, Nmu, Zfhx3, Ano2, Dgkg, Plcl1, Kcnb2, Ano5, Myl1, Ntng1 
##     Brinp3, Astn2, Kif26b, Gpr149, Slc4a4, Cpne4, Dgkb, Cdh6, Spock3, Lingo2 
## Negative:  Grik1, Galntl6, Tshz2, Bnc2, Ptprt, Pcdh7, Dlgap2, Cdc14a, Tox, Nkain2 
##     Plcxd3, Arhgap24, St6galnac3, Tmem132c, Pde4b, Fhit, Oprk1, Alk, Brinp2, Caln1 
##     Gfra2, Satb1, Lrp1b, Ptprd, Sdk2, Colec12, Adamtsl1, Slc26a4, Pantr1, Galnt17 
## PC_ 3 
## Positive:  Tenm2, Dlgap1, Kctd8, Dock10, Unc5d, Schip1, Pde4d, Kcnip4, Grm7, Kctd16 
##     Dmd, Ndst4, Kcnd2, Lrp1b, Nhs, Tac1, L3mbtl4, Stac, Skap1, Fut9 
##     Plcb1, Egfem1, Pde1c, Pde7b, Kirrel3, Col27a1, Egfr, Epha5, Csmd3, Sema3e 
## Negative:  Cpne4, Cdh8, Mgat4c, Ccbe1, Chsy3, Nrxn3, Tshz2, Nmu, Ntrk3, Ptprt 
##     Ano2, Plxna4, Tcf7l2, B3galt1, Dgkg, Gpr149, Kcnb2, Cux2, Nfia, Dgki 
##     Itgb6, Slc2a13, Galnt17, Calcb, Robo2, Astn2, Clstn2, Scube1, Prune2, Pcdh9 
## PC_ 4 
## Positive:  Tafa2, Ntng1, Klhl1, Kcnh7, L3mbtl4, Flrt2, Pbx3, Trpm3, Skap1, Stac 
##     Zfhx3, Kctd16, Sema3e, Gna14, Nxph2, Pde7b, Pbx1, Tmtc2, Enox1, Csmd1 
##     Bmpr1b, Dlc1, Slc44a5, Sez6l, Kif26b, Tenm4, Ano5, Zbbx, Csmd2, Chrm3 
## Negative:  Car10, Cntn5, Nxph1, Pcdh9, Synpr, Csmd3, Cntnap2, Zfp804b, Cdh8, Dmd 
##     Spock3, Lrp1b, Nmu, Epha5, Mgat4c, Pcdh11x, Fstl4, Kcnip4, Pcdh10, Erbb4 
##     Unc5d, Epha6, Cacna2d3, Dock10, Vgll3, Galntl6, Zfp804a, Gpr149, Calcrl, Ano2 
## PC_ 5 
## Positive:  Ntng1, Klhl1, Trps1, Gna14, Csmd1, Nxph2, Kcnd2, Kif26b, Bmpr1b, Sgcz 
##     Cdh18, Cntn5, Csmd3, Ano5, Pcdh7, Gng2, Galnt18, Tnr, Alk, Kcnip4 
##     Schip1, Dlc1, Spock1, Spock3, Tmeff2, Serpini1, Pakap, Cpne8, Rbfox1, Atp7a 
## Negative:  Sema3e, Kctd16, Pde7b, Lingo2, Fut9, Egfr, Prkg1, Fras1, Grm8, Nfatc1 
##     Mgll, P3h2, Shisa6, Camk1d, L3mbtl4, Tac1, Grm7, Eepd1, Col27a1, Lmo7 
##     Robo1, Hcn1, Stac, Lamc3, Fam189a1, Dock1, Met, Plppr5, Cdh8, Ptprz1
length(setdiff(VariableFeatures(object = GEX.seur),
                                                c(#MT_gene,
                                                  DIG,CC_gene)))
## [1] 1126
setdiff(VariableFeatures(object = GEX.seur),
                                                c(#MT_gene,
                                                  DIG,CC_gene))[1:300]
##   [1] "Cntnap2"   "Mgat4c"    "Zfp804a"   "Cntn5"     "Sgcz"      "Robo2"    
##   [7] "Nmu"       "Klhl1"     "Lingo2"    "Cdh8"      "Kcnb2"     "Cpne4"    
##  [13] "Sst"       "Pcdh9"     "Kctd16"    "Grm7"      "Adgrg6"    "Ntng1"    
##  [19] "Csmd1"     "Rbfox1"    "Brinp3"    "Car10"     "Nrxn3"     "Galntl6"  
##  [25] "Sema3e"    "Mmrn1"     "Asic2"     "Dgkg"      "Tmeff2"    "Cdh9"     
##  [31] "Unc5d"     "Ano5"      "Chsy3"     "Gpr149"    "Prkg1"     "Astn2"    
##  [37] "Pcdh11x"   "Kcnq5"     "Ano2"      "Kirrel3"   "Ebf1"      "Schip1"   
##  [43] "Cdh18"     "Gpc6"      "Grik1"     "Lrp1b"     "Ptprt"     "M1ap"     
##  [49] "Trps1"     "L3mbtl4"   "Dgki"      "Gpc5"      "Fut9"      "Tafa1"    
##  [55] "Spock3"    "Xirp2"     "Grm8"      "Opcml"     "Kcnh7"     "Tafa2"    
##  [61] "Plxna4"    "Nxph1"     "Pde7b"     "March1"    "Nxph2"     "Pde4d"    
##  [67] "Nrg1"      "St8sia2"   "Col25a1"   "Itgb6"     "Zfp804b"   "Dcc"      
##  [73] "Pbx3"      "Kctd8"     "Plcl1"     "Avil"      "Prox1"     "Tac1"     
##  [79] "Zfhx3"     "Chrm3"     "Alk"       "Inpp4b"    "Pdzrn4"    "Vip"      
##  [85] "Casp8"     "Kcnmb2"    "Cacna2d3"  "Cpa6"      "Rfx6"      "Dgkb"     
##  [91] "Nog"       "Ntrk3"     "Ccbe1"     "Dlc1"      "Egflam"    "Pcdh10"   
##  [97] "Pde4b"     "Cysltr2"   "Shisa6"    "Lgals9"    "Gna14"     "Cmah"     
## [103] "Wdr17"     "Robo1"     "Dlgap1"    "Dock1"     "Clstn2"    "Cdh6"     
## [109] "Grin3a"    "Kif26b"    "Csmd3"     "Bloc1s6os" "Gimap6"    "Gabra1"   
## [115] "Spag5"     "Ccdc68"    "Eya1"      "Myl1"      "Serpinb8"  "Olfr889"  
## [121] "Adarb2"    "Dock10"    "Unc5c"     "Piezo2"    "Abca9"     "Tenm2"    
## [127] "Erbb4"     "Fhit"      "Bmpr1b"    "Rab27b"    "Vgll3"     "Stac"     
## [133] "Fstl4"     "Mast4"     "Skap1"     "Ssc4d"     "Hcn1"      "Galnt18"  
## [139] "Calcb"     "Cntn4"     "Dab1"      "Ror1"      "Pdyn"      "Tacr1"    
## [145] "Trp53cor1" "Il1rapl1"  "Slc4a4"    "Sema5a"    "Serpini2"  "Cyp2j13"  
## [151] "Smpdl3b"   "Fendrr"    "Col22a1"   "Meltf"     "Kcnh3"     "Kcng2"    
## [157] "Syt10"     "Rtl4"      "Serpini1"  "Met"       "Egfr"      "Kcnd2"    
## [163] "Egfros"    "Efr3a"     "Dach1"     "Nos1"      "Cux2"      "Cadm2"    
## [169] "Tenm4"     "Thsd7a"    "Nell1"     "Gabra3"    "Layn"      "Pcdh7"    
## [175] "Thsd4"     "Penk"      "Htr2c"     "Grp"       "Cftr"      "Platr7"   
## [181] "Pde1a"     "Luzp2"     "Camk1d"    "P3h2"      "Chrna9"    "Greb1"    
## [187] "Mgarp"     "Rbm46"     "Ndufa4l2"  "Nlrp3"     "Mep1b"     "Tnr"      
## [193] "Moxd1"     "Rgs6"      "Bnc2"      "Ptprk"     "Galnt5"    "Clca3a2"  
## [199] "Adamts13"  "Ier5l"     "Bcat1"     "Mir9-3hg"  "Smim31"    "Lrrc18"   
## [205] "Mei1"      "Ptcra"     "Adgre4"    "Pik3ap1"   "Synpr"     "Ahrr"     
## [211] "Acss1"     "Rasgrf1"   "Thsd7b"    "Cntnap5b"  "Trhde"     "Olfm3"    
## [217] "Cdh12"     "Unc13c"    "Flrt2"     "Cntn3"     "Gal"       "Slc35f4"  
## [223] "Adamts9"   "Hmcn1"     "Rarb"      "Adam2"     "Ttc29"     "Ghr"      
## [229] "Stab2"     "P2ry10b"   "Sorcs1"    "Olfr78"    "Hs3st4"    "Grm1"     
## [235] "Myh11"     "Ntm"       "Ephb1"     "Dpp10"     "Satb2"     "Cdkn3"    
## [241] "B3galt1"   "Gng2"      "Kcnn2"     "Pdgfd"     "Gli2"      "Myo3b"    
## [247] "Ube2u"     "Spocd1"    "Cblc"      "Prrt2"     "Maf"       "Acpp"     
## [253] "Efemp1"    "Cfap53"    "Meig1"     "Egfem1"    "Plch1"     "Mtnr1b"   
## [259] "Tcf7l1"    "Syn3"      "Slit3"     "Mgll"      "Ntrk2"     "Tmem132c" 
## [265] "Brinp2"    "Eepd1"     "Galr1"     "Sorcs3"    "Vcan"      "Etl4"     
## [271] "Pbx1"      "Sctr"      "Prune2"    "Prr16"     "Fgf14"     "Grid1"    
## [277] "Fbxl7"     "Fam189a1"  "Rmst"      "Chrna7"    "Arhgap28"  "Reln"     
## [283] "Tmem132d"  "Cpne9"     "Col8a1"    "Crispld1"  "Neto1"     "Ldb2"     
## [289] "Nell1os"   "Sv2c"      "Lrrc7"     "Cdc14a"    "Pde1c"     "Il18r1"   
## [295] "Folh1"     "Clhc1"     "Myh3"      "Macc1"     "Alpk2"     "Slc25a45"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")

DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
  DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)

DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)

decide PCs to use
ElbowPlot(GEX.seur,ndims = 50)

PCs <- 1:24
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 2.6)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 11666
## Number of edges: 488790
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7611
## Number of communities: 27
## Elapsed time: 1 seconds

Run UMAP/tSNE

GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 998)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 01:05:09 UMAP embedding parameters a = 0.9922 b = 1.112
## 01:05:09 Read 11666 rows and found 24 numeric columns
## 01:05:09 Using Annoy for neighbor search, n_neighbors = 20
## 01:05:09 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 01:05:11 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\Rtmpio52Tb\file729081c1684
## 01:05:11 Searching Annoy index using 1 thread, search_k = 2000
## 01:05:13 Annoy recall = 100%
## 01:05:13 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 01:05:14 Initializing from normalized Laplacian + noise (using irlba)
## 01:05:14 Commencing optimization for 200 epochs, with 341026 positive edges
## 01:05:26 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)

#saveRDS(GEX.seur,"GEX.pure_reclustering.temp.rds")
FeaturePlot(GEX.seur, features = c(#"Chat"
                                   "Bnc2","Fut9","Nos1","Vip",
                                   "Gal","Moxd1","Sst","Calcb",
                                   "Nmu","Cdh9","Piezo2","Nxph2"),
            pt.size = 0.001,
            ncol = 4)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno", cols = color.pre) +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno", cols = color.pre) + DimPlot(GEX.seur, reduction = "umap", label = T)

Anno

GEX.seur@meta.data[,grep("pANN|snn|sort",colnames(GEX.seur@meta.data),value = T)] <- NULL
head(GEX.seur@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA percent.mt percent.rb
## AAACCCAAGGTTGGTG-1    CR7d_LI        620          482  0.0000000  0.3225806
## AAACCCACAAATCGTC-1    CR7d_LI        975          674  0.1025641  0.5128205
## AAACCCACAAGGGCAT-1    CR7d_LI        863          627  0.0000000  0.1158749
## AAACCCACACGGCACT-1    CR7d_LI        341          278  0.0000000  0.0000000
## AAACCCACAGCGAGTA-1    CR7d_LI        818          626  1.1002445  0.6112469
## AAACCCACATAGCACT-1    CR7d_LI        952          678  0.0000000  0.2100840
##                    FB.info      S.Score    G2M.Score Phase seurat_clusters cnt
## AAACCCAAGGTTGGTG-1   CKO.2 -0.004383219 -0.008517888    G1               5 CKO
## AAACCCACAAATCGTC-1   CKO.3 -0.010644959 -0.014196479    G1               6 CKO
## AAACCCACAAGGGCAT-1   CKO.3 -0.009392611  0.011330771   G2M               6 CKO
## AAACCCACACGGCACT-1   CKO.4  0.018126937 -0.004542873     S              13 CKO
## AAACCCACAGCGAGTA-1   CTL.1 -0.013775830 -0.014764338    G1              17 CTL
## AAACCCACATAGCACT-1   CTL.1 -0.010018785  0.033450867   G2M              12 CTL
##                    DoubletFinder0.05 DoubletFinder0.1 preAnno
## AAACCCAAGGTTGGTG-1           Singlet          Singlet    IMN1
## AAACCCACAAATCGTC-1           Singlet          Singlet     IN3
## AAACCCACAAGGGCAT-1           Singlet          Singlet     IN3
## AAACCCACACGGCACT-1           Singlet          Singlet     IN3
## AAACCCACAGCGAGTA-1           Singlet          Singlet   IPAN4
## AAACCCACATAGCACT-1           Singlet          Singlet    IMN1
GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
                                 levels = c(4,9,7,11, 21,16, 3, 18, 19,    # EMN
                                            2,12,5,1,10, 0,15, 20, 8,22,    # IMN
                                            23, 25, 13,6,       # IN
                                            14, 26, 24, 17   # IPAN
                                            ))
# C22, give to IMN4 or IPAN3 ? IMN4 for high Nos1/Vip level ! 
# but should know it has higher Ntng1/Piezo2 which is closer to IPAN3, well, indeed linked 
VlnPlot(GEX.seur, features = c("Nos1","Vip","Ntng1","Piezo2"),
        ncol = 2, group.by = "sort_clusters")

GEX.seur$Anno <- as.character(GEX.seur$seurat_clusters)

GEX.seur$Anno[GEX.seur$Anno %in% c(4,9,7,11)] <- "EMN1"
GEX.seur$Anno[GEX.seur$Anno %in% c(21,16)] <- "EMN2"
GEX.seur$Anno[GEX.seur$Anno %in% c(3)] <- "EMN3"
GEX.seur$Anno[GEX.seur$Anno %in% c(18)] <- "EMN4"
GEX.seur$Anno[GEX.seur$Anno %in% c(19)] <- "EMN5"

GEX.seur$Anno[GEX.seur$Anno %in% c(2,12,5,1,10)] <- "IMN1"
GEX.seur$Anno[GEX.seur$Anno %in% c(0,15)] <- "IMN2"
GEX.seur$Anno[GEX.seur$Anno %in% c(20)] <- "IMN3"
GEX.seur$Anno[GEX.seur$Anno %in% c(8,22)] <- "IMN4"

GEX.seur$Anno[GEX.seur$Anno %in% c(23)] <- "IN1"
GEX.seur$Anno[GEX.seur$Anno %in% c(25)] <- "IN2"
GEX.seur$Anno[GEX.seur$Anno %in% c(13,6)] <- "IN3"

GEX.seur$Anno[GEX.seur$Anno %in% c(14)] <- "IPAN1"
GEX.seur$Anno[GEX.seur$Anno %in% c(26)] <- "IPAN2"
GEX.seur$Anno[GEX.seur$Anno %in% c(24)] <- "IPAN3"
GEX.seur$Anno[GEX.seur$Anno %in% c(17)] <- "IPAN4"

GEX.seur$Anno <- factor(GEX.seur$Anno)
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "Anno", cols = color.pre) + DimPlot(GEX.seur, reduction = "umap", label = T)

DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "Anno", cols = color.pre) +
  DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)

final markers

markers.new.ss <- list(EMN=c("Chat","Bnc2","Tox","Ptprt",
                       "Gfra2","Oprk1","Adamtsl1", 
                       "Fbxw15","Fbxw24","Chrna7",
                       "Satb1","Itga6","Cntnap5b",
                       "Pgm5","Chgb",
                       "Nxph1",
                       "Gabrb1","Glp2r","Nebl",
                       "Lrrc7",
                       "Ryr3","Eda",
                       "Hgf","Lama2","Efnb2",
                       "Tac1",
                       "Kctd8",
                       "Ptn",
                       "Ntrk2","Penk","Itgb8",
                       "Fut9","Nfatc1","Egfr","Pdia5",
                       "Ahr","Mgll",
                       "Aff3",
                       "Chrm3"
                       ),
                 IMN=c("Nos1","Kcnab1",
                       "Gfra1","Etv1",
                       "Man1a","Airn",
                       "Adcy2",
                       "Col25a1",
                       "Cmah","Creb5","Vip","Pde1a",
                       "Ebf1","Gpc5","Mid1","Igfbp5",
                       "Ppara",
                       "Pcdh11x","Adcy8","Grp"
                       ),
                 IN=c("Npas3","Synpr","St18","Gal",
                      "Nova1",
                      "Cdh10","Kcnk13",
                      "Neurod6","Moxd1","Sctr",
                      "Piezo1","Vipr2","Adamts9","Sst","Kcnn2"
                      ),
                 IPAN=c("Calb2","Adcy1","Calcb","Nmu","Adgrg6","Pcdh10",
                        "Ngfr","Galr1","Il7","Aff2",
                        "Gpr149","Itgb6","Met","Itgbl1",
                        
                        "Cdh6","Cdh8",
                        "Clstn2","Ano2","Ntrk3",
                        "Cpne4","Vwc2l","Cdh9",
                        "Car10","Dcc",
                        "Scgn",
                        "Vcan","Cck","Piezo2","Kcnh7",
                        "Rerg","Bmpr1b","Skap1","Ntng1",
                        "Tafa2","Nxph2"),
                 check0407 = c("Gcg","Glp2r","Insl5","Nts",
               "Ntsr1","Ntsr2","Pyy",#"Sst",
               "Gip","Sct","Sctr","Ghrl"))
check.markers.ss <- as.vector(unlist(markers.new.ss))
check.markers.ss
##   [1] "Chat"     "Bnc2"     "Tox"      "Ptprt"    "Gfra2"    "Oprk1"   
##   [7] "Adamtsl1" "Fbxw15"   "Fbxw24"   "Chrna7"   "Satb1"    "Itga6"   
##  [13] "Cntnap5b" "Pgm5"     "Chgb"     "Nxph1"    "Gabrb1"   "Glp2r"   
##  [19] "Nebl"     "Lrrc7"    "Ryr3"     "Eda"      "Hgf"      "Lama2"   
##  [25] "Efnb2"    "Tac1"     "Kctd8"    "Ptn"      "Ntrk2"    "Penk"    
##  [31] "Itgb8"    "Fut9"     "Nfatc1"   "Egfr"     "Pdia5"    "Ahr"     
##  [37] "Mgll"     "Aff3"     "Chrm3"    "Nos1"     "Kcnab1"   "Gfra1"   
##  [43] "Etv1"     "Man1a"    "Airn"     "Adcy2"    "Col25a1"  "Cmah"    
##  [49] "Creb5"    "Vip"      "Pde1a"    "Ebf1"     "Gpc5"     "Mid1"    
##  [55] "Igfbp5"   "Ppara"    "Pcdh11x"  "Adcy8"    "Grp"      "Npas3"   
##  [61] "Synpr"    "St18"     "Gal"      "Nova1"    "Cdh10"    "Kcnk13"  
##  [67] "Neurod6"  "Moxd1"    "Sctr"     "Piezo1"   "Vipr2"    "Adamts9" 
##  [73] "Sst"      "Kcnn2"    "Calb2"    "Adcy1"    "Calcb"    "Nmu"     
##  [79] "Adgrg6"   "Pcdh10"   "Ngfr"     "Galr1"    "Il7"      "Aff2"    
##  [85] "Gpr149"   "Itgb6"    "Met"      "Itgbl1"   "Cdh6"     "Cdh8"    
##  [91] "Clstn2"   "Ano2"     "Ntrk3"    "Cpne4"    "Vwc2l"    "Cdh9"    
##  [97] "Car10"    "Dcc"      "Scgn"     "Vcan"     "Cck"      "Piezo2"  
## [103] "Kcnh7"    "Rerg"     "Bmpr1b"   "Skap1"    "Ntng1"    "Tafa2"   
## [109] "Nxph2"    "Gcg"      "Glp2r"    "Insl5"    "Nts"      "Ntsr1"   
## [115] "Ntsr2"    "Pyy"      "Gip"      "Sct"      "Sctr"     "Ghrl"
check.markers.sss <- check.markers.ss[check.markers.ss %in% rownames(GEX.seur)]

pm.All_pre <- DotPlot(GEX.seur, features = rev(unique(rev(check.markers.sss))), group.by = "Anno",assay = "RNA",
        cols = c("midnightblue","darkorange1")) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="check previous markers")
pm.All_pre

just directly using final markers used in SI data sets

short_old

## define feature colors
# Cell2020     
material.heat = function(n){
  colorRampPalette(
    c(
      "#283593", # indigo 800
      "#3F51B5", # indigo
      "#2196F3", # blue
      "#00BCD4", # cyan
      "#4CAF50", # green
      "#8BC34A", # light green
      "#CDDC39", # lime
      "#FFEB3B", # yellow
      "#FFC107", # amber
      "#FF9800", # orange
      "#FF5722"  # deep orange
    )
  )(n)
}
markers.old.s <- list(EMN=c("Chat","Bnc2",#"Tox","Ptprt",
                       "Gfra2","Oprk1",#"Adamtsl1", 
                       "Fbxw15","Fbxw24",#"Chrna7",
                       "Satb1","Cntnap5b",
                       "Gabrb1","Nxph1","Lama2","Lrrc7",
                       "Ryr3",#"Eda",
                       "Tac1",
                       #"Kctd8","Ntrk2",
                       "Penk", 
                       "Fut9","Nfatc1","Egfr","Ahr"#,#"Mgll",
                       #"Chrm3"
                       ),
                 IMN=c("Nos1","Kcnab1",
                       "Gfra1","Etv1",
                       #"Man1a","Airn",
                       "Adcy2","Cmah","Creb5","Vip","Pde1a",
                       "Ebf1"#,"Gpc5"
                       ),
                 IN=c("Npas3","Synpr","St18","Gal",
                      "Neurod6",
                      #"Kcnk13",
                      "Moxd1","Sctr",
                      "Piezo1","Sst",#"Adamts9",
                      "Kcnn2"),
                 IPAN=c("Calb2","Calcb","Nmu","Adgrg6",#"Pcdh10",
                        "Ngfr","Galr1","Il7",#"Aff2",
                        #"Gpr149",
                        "Cdh6","Cdh8",
                        "Clstn2",#"Ano2","Ntrk3",
                        "Cpne4",#"Vwc2l",
                        "Cdh9","Scgn",
                        #"Vcan",
                        "Cck","Piezo2","Kcnh7",
                        #"Rerg",
                        "Bmpr1b","Skap1","Ntng1",
                        "Tafa2","Nxph2"))
pn.Anno.test0a <- DotPlot(GEX.seur, features = as.vector(unlist(markers.old.s)), group.by = "Anno",
        cols = c("midnightblue","darkorange1")) +
  #coord_flip() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ 
  scale_color_gradientn(colours  = material.heat(100)) +
  #scale_x_discrete(limits=rev) + 
  scale_y_discrete(limits=rev) #+ 
  #labs(title="")
pn.Anno.test0a

comparing with SI data annotations, here in Colon, EMN4/5 should be SI:EMN5 but larger into two parts
similarly, larger Sst+ IN3 into two parts and directly linked to EMNs
IPAN2 becomes a small tail of IPAN1, this is caused by sampling ? while in Cell2020 newAnno, IPAN2 still has a similar level
some markers also have some different performance, but the main sub-celltypes should be matched ~
pn.Anno.test0b <- DotPlot(GEX.seur, features = as.vector(unlist(markers.old.s)), group.by = "Anno",
        cols = c("midnightblue","darkorange1")) +
  coord_flip() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4,size = 9.15))+ 
  scale_color_gradientn(colours  = material.heat(100)) #+
  #scale_x_discrete(limits=rev) + scale_y_discrete(limits=rev) + 
  #labs(title="All Anno")
pn.Anno.test0b

short_new

markers.new.s <- list(EMN=c("Chat","Bnc2",#"Tox","Ptprt",
                       "Gfra2","Oprk1",#"Adamtsl1", 
                       "Fbxw15","Fbxw24",#"Chrna7",
                       "Satb1","Itga6","Cntnap5b",
                       "Chgb","Nxph1",
                       "Lama2","Efnb2","Itgb8",
                       "Lrrc7",
                       "Ryr3",#"Eda",
                       "Tac1",
                       #"Kctd8","Ntrk2",
                       "Penk",
                       "Fut9","Nfatc1","Egfr","Ahr"#"Mgll",
                       #"Chrm3"
                       ),
                 IMN=c("Nos1","Kcnab1",
                       "Gfra1","Etv1",
                       #"Man1a","Airn",
                       "Adcy2","Cmah","Col25a1",
                       "Mid1","Creb5","Vip","Pde1a",
                       "Ebf1",#,"Gpc5"
                       "Ppara","Pcdh11x",
                       "Adcy8","Grp"
                       ),
                 IN=c("Npas3","Synpr","St18","Gal",
                      "Cdh10","Neurod6",
                      "Kcnk13",
                      "Moxd1","Sctr",
                      "Piezo1","Vipr2","Sst",#"Adamts9",
                      "Kcnn2"
                      ),
                 IPAN=c("Calb2","Adcy1",
                        "Nmu","Adgrg6",#"Pcdh10",
                        "Ngfr","Il7",
                        "Itgb6","Calcb","Galr1",
                        #"Aff2",
                        #"Gpr149",
                        "Met",
                        "Cpne4","Cdh6","Cdh8",
                        "Clstn2",#"Ano2","Ntrk3",
                        #"Vwc2l",
                        "Car10","Scgn","Glp2r","Cck",
                        "Cdh9",
                        #"Vcan",
                        "Dcc",
                        "Gabrb1",
                        "Piezo2","Kcnh7",
                        #"Rerg",
                        "Bmpr1b","Ntng1","Skap1",
                        "Tafa2","Nxph2"))
pm.Anno.test0a <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "Anno",
        cols = c("midnightblue","darkorange1")) +
  #coord_flip() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ 
  scale_color_gradientn(colours  = material.heat(100)) +
  #scale_x_discrete(limits=rev) + 
  scale_y_discrete(limits=rev) #+ 
  #labs(title="All Anno")
pm.Anno.test0a

pm.Anno.test0b <- DotPlot(GEX.seur, features = as.vector(unlist(markers.new.s)), group.by = "Anno",
        cols = c("midnightblue","darkorange1")) +
  coord_flip() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4,size = 9.15))+ 
  scale_color_gradientn(colours  = material.heat(100)) #+
  #scale_x_discrete(limits=rev) + scale_y_discrete(limits=rev) + 
  #labs(title="All Anno")
pm.Anno.test0b

plot for ref.data

ref1.Cell2020 snSS2 Colon

https://doi.org/10.1016/j.cell.2020.08.003
https://singlecell.broadinstitute.org/single_cell/study/SCP1038

# Cell2020, Anno1: PEMN/PIMN/PIN/PSN/PSVN.. exactly as fig.2a in the paper
color.ref1.1 <- c("#F8766D","#ED8141","#E08B00","#CF9400","#BB9D00",
                  "#5BB300","#00B81F","#00BC59","#00BF7D","#00C19C","#00C0B8","#00BDD0",
                  "#00B0F6","#00A5FF","#7997FF",
                  "#AC88FF","#E770F3","#F763E0","#FF61C9",
                  "#FF65AE","#FF6C91")

# Anno2, similar to local
color.ref1.2 <- c("#678BB1","#8AB6CE","#3975C1","#669FDF","#4CC1BD",
                  "#BF7E6B","#D46B35","#F19258","#FF8080",
                  "#BDAE8D","#BD66C4","#C03778",
                  "#97BA59","#DFAB16","#2BA956","#9FE727"#,
                  #"red"
                  )
ref1.seur <- readRDS("/Shared_win/projects/ENS_data/SCP1038_ENS_scRNAseq_Cell2020/final_RAISIN/ss2_neur.newAnno2024.rds")
ref1.seur
## An object of class Seurat 
## 20036 features across 2644 samples within 1 assay 
## Active assay: RNA (20036 features, 1305 variable features)
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(ref1.seur, reduction = "umap", label = T, group.by = "Anno1", cols = color.ref1.1, label.size = 2.65) +
  DimPlot(ref1.seur, reduction = "umap", label = T, group.by = "Anno2", cols = color.ref1.2, label.size = 3.45)

pn.ref1.a <- DotPlot(ref1.seur, features = as.vector(unlist(markers.old.s)), group.by = "Anno2",
        cols = c("midnightblue","darkorange1")) +
  #coord_flip() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ 
  scale_color_gradientn(colours  = material.heat(100)) +
  #scale_x_discrete(limits=rev) + 
  scale_y_discrete(limits=rev) + 
  labs(title="ref1.Cell2020 snSS2 Colon neuron")
pn.ref1.a

pn.ref1.b <- DotPlot(ref1.seur, features = as.vector(unlist(markers.old.s)), group.by = "Anno2",
        cols = c("midnightblue","darkorange1")) +
  coord_flip() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4,size = 9.15))+ 
  scale_color_gradientn(colours  = material.heat(100)) +
  #scale_x_discrete(limits=rev) + scale_y_discrete(limits=rev) + 
  labs(title="ref1.Cell2020 snSS2 Colon neuron")
pn.ref1.b

pm.ref1.a <- DotPlot(ref1.seur, features = as.vector(unlist(markers.new.s)), group.by = "Anno2",
        cols = c("midnightblue","darkorange1")) +
  #coord_flip() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ 
  scale_color_gradientn(colours  = material.heat(100)) +
  #scale_x_discrete(limits=rev) + 
  scale_y_discrete(limits=rev) + 
  labs(title="ref1.Cell2020 snSS2 Colon neuron")
pm.ref1.a

pm.ref1.b <- DotPlot(ref1.seur, features = as.vector(unlist(markers.new.s)), group.by = "Anno2",
        cols = c("midnightblue","darkorange1")) +
  coord_flip() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4,size = 9.15))+ 
  scale_color_gradientn(colours  = material.heat(100)) +
  #scale_x_discrete(limits=rev) + scale_y_discrete(limits=rev) + 
  labs(title="ref1.Cell2020 snSS2 Colon neuron")
pm.ref1.b

ref2.NatNeur2021 sc10x SI

https://www.nature.com/articles/s41593-020-00736-x
https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE149524

# NatNeur2021, Anno1: ENC1/2/3... as in the paper
color.ref2.1 <- c("#8AB6CE","#678BB1","#3975C1","#4CC1BD",
                  "#C03778","#97BA59","#DFAB16","#BF7E6B",
                  "#D46B35","#BDAE8D","#BD66C4","#2BA956",
                  "#FF8080","#FF8080","#FF8080","#FF0000")

# Anno2, similar to local, 
#      without Vip(hi) IMN3/4, 
#      very few may be in the tail of IMN2 or mixed with IPAN3
color.ref2.2 <- c("#678BB1","#8AB6CE","#3975C1","#669FDF","#4CC1BD",
                  "#BF7E6B","#D46B35",#"#F19258","#FF8080",
                  "#BDAE8D","#BD66C4","#C03778",
                  "#97BA59","#DFAB16","#2BA956","#9FE727"#,
                  #"red"
                  )
ref2.seur <- readRDS("/Shared_win/projects/20230704_10x_SZJ/analysis_ref/GSE149524.P21.integration_Anno.s.rds")
ref2.seur
## An object of class Seurat 
## 37583 features across 4419 samples within 3 assays 
## Active assay: SCT (16365 features, 0 variable features)
##  2 other assays present: RNA, integrated
##  3 dimensional reductions calculated: pca, tsne, umap
DimPlot(ref2.seur, reduction = "umap", label = T, group.by = "Anno1", cols = color.ref2.1) +
  DimPlot(ref2.seur, reduction = "umap", label = T, group.by = "Anno2", cols = color.ref2.2)

(dotplot already done in SI data integration part)

comparison

check.markers.ref <- c("Chat","Bnc2","Penk","Fut9",
                       "Tac1","Ahr","Nos1","Etv1",
                       "Vip","Gal","Moxd1","Sctr",
                       "Piezo1","Sst","Calb2","Calcb",
                       "Nmu","Cdh8","Cdh9","Cck",
                       "Piezo2","Nxph2")
length(check.markers.ref)
## [1] 22
vln.check <- VlnPlot(GEX.seur, features = check.markers.ref, cols = color.pre,
                    assay = "RNA", group.by = "Anno", ncol = 2, pt.size = 0.01) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.check

vln.check.s <- VlnPlot(GEX.seur, features = check.markers.ref, cols = color.pre,
                    assay = "RNA", group.by = "Anno", ncol = 2, pt.size = 0) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.check.s

vln.ref1 <- VlnPlot(ref1.seur, features = check.markers.ref, cols = color.ref1.2,
                    assay = "RNA", group.by = "Anno2", ncol = 2, pt.size = 0.01) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.ref1

vln.ref1.s <- VlnPlot(ref1.seur, features = check.markers.ref, cols = color.ref1.2,
                    assay = "RNA", group.by = "Anno2", ncol = 2, pt.size = 0) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.ref1.s

vln.ref2 <- VlnPlot(ref2.seur, features = check.markers.ref, cols = color.ref2.2,
                    assay = "RNA", group.by = "Anno2", ncol = 2, pt.size = 0.01) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.ref2

vln.ref2.s <- VlnPlot(ref2.seur, features = check.markers.ref, cols = color.ref2.2,
                    assay = "RNA", group.by = "Anno2", ncol = 2, pt.size = 0) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.ref2.s

normalize to neuron markers

# markers used in Cell2020
# details see in 0.test_public/ref1.SCP1038_Cell2020/test202012.final_RAISIN

neur.markers <- c("Tubb3","Elavl4","Ret","Phox2b", 
                  "Chrnb4","Eml5","Smpd3","Tagln3", 
                  "Snap25","Gpr22","Gdap1l1","Stmn3", 
                  "Chrna3","Scg3","Syt4","Ncan", 
                  "Crmp1","Adcyap1r1","Elavl3","Dlg2",
                  "Cacna2d1", "Cacna2d2")



#write.table(check.markers.ref,"./figures.202409/check.markers.txt", row.names = F, col.names = F, quote = F)
#write.table(neur.markers,"./figures.202409/Cell2020.neuron.markers.txt", row.names = F, col.names = F, quote = F)
#
ref1.seur$Ahr.raw <- ref1.seur@assays$RNA@data["Ahr",]
ref1.seur$Ahr.norm <- ( ref1.seur@assays$RNA@data["Ahr",] - colMeans(ref1.seur@assays$RNA@data[neur.markers,]) ) / apply(ref1.seur@assays$RNA@data[neur.markers,],2,sd)

ref1.seur$Sst.raw <- ref1.seur@assays$RNA@data["Sst",]
ref1.seur$Sst.norm <- ( ref1.seur@assays$RNA@data["Sst",] - colMeans(ref1.seur@assays$RNA@data[neur.markers,]) ) / apply(ref1.seur@assays$RNA@data[neur.markers,],2,sd)

#
ref2.seur$Ahr.raw <- ref2.seur@assays$RNA@data["Ahr",]
ref2.seur$Ahr.norm <- ( ref2.seur@assays$RNA@data["Ahr",] - colMeans(ref2.seur@assays$RNA@data[neur.markers,]) ) / apply(ref2.seur@assays$RNA@data[neur.markers,],2,sd)

ref2.seur$Sst.raw <- ref2.seur@assays$RNA@data["Sst",]
ref2.seur$Sst.norm <- ( ref2.seur@assays$RNA@data["Sst",] - colMeans(ref2.seur@assays$RNA@data[neur.markers,]) ) / apply(ref2.seur@assays$RNA@data[neur.markers,],2,sd)
vln.Ahr.ref1 <- VlnPlot(ref1.seur, features = c("Ahr.raw","Ahr.norm"), ncol = 2, group.by = "Anno2", cols = color.ref1.2) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.Ahr.ref1

vln.Sst.ref1 <- VlnPlot(ref1.seur, features = c("Sst.raw","Sst.norm"), ncol = 2, group.by = "Anno2", cols = color.ref1.2) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.Sst.ref1

vln.Ahr.ref2 <- VlnPlot(ref2.seur, features = c("Ahr.raw","Ahr.norm"), ncol = 2, group.by = "Anno2", cols = color.ref2.2) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.Ahr.ref2

vln.Sst.ref2 <- VlnPlot(ref2.seur, features = c("Sst.raw","Sst.norm"), ncol = 2, group.by = "Anno2", cols = color.ref2.2) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.Sst.ref2

ref1.seur$mark <- "ref1"
ref2.seur$mark <- "ref2"
vln.Ahr.ref1a <- VlnPlot(ref1.seur, features = c("Ahr.raw","Ahr.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Ahr.ref1a

vln.Sst.ref1a <- VlnPlot(ref1.seur, features = c("Sst.raw","Sst.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Sst.ref1a

vln.Ahr.ref2a <- VlnPlot(ref2.seur, features = c("Ahr.raw","Ahr.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Ahr.ref2a

vln.Sst.ref2a <- VlnPlot(ref2.seur, features = c("Sst.raw","Sst.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Sst.ref2a

calculate all parallelly
#
for(NN in check.markers.ref){
  
  GEX.seur@meta.data[,paste0(NN,".norm")] <- (GEX.seur@assays$RNA@data[NN,] - colMeans(GEX.seur@assays$RNA@data[neur.markers,])) / apply(GEX.seur@assays$RNA@data[neur.markers,],2,sd)
  
}

# ref1
for(NN in check.markers.ref){
  
  ref1.seur@meta.data[,paste0(NN,".norm")] <- (ref1.seur@assays$RNA@data[NN,] - colMeans(ref1.seur@assays$RNA@data[neur.markers,])) / apply(ref1.seur@assays$RNA@data[neur.markers,],2,sd)
  
}

# ref2
for(NN in check.markers.ref){
  
  ref2.seur@meta.data[,paste0(NN,".norm")] <- (ref2.seur@assays$SCT@data[NN,] - colMeans(ref2.seur@assays$SCT@data[neur.markers,])) / apply(ref2.seur@assays$RNA@data[neur.markers,],2,sd)
  
}
check.markers.refb <- paste0(check.markers.ref,".norm")
check.markers.refb 
##  [1] "Chat.norm"   "Bnc2.norm"   "Penk.norm"   "Fut9.norm"   "Tac1.norm"  
##  [6] "Ahr.norm"    "Nos1.norm"   "Etv1.norm"   "Vip.norm"    "Gal.norm"   
## [11] "Moxd1.norm"  "Sctr.norm"   "Piezo1.norm" "Sst.norm"    "Calb2.norm" 
## [16] "Calcb.norm"  "Nmu.norm"    "Cdh8.norm"   "Cdh9.norm"   "Cck.norm"   
## [21] "Piezo2.norm" "Nxph2.norm"
vln.checkb <- VlnPlot(GEX.seur, features = check.markers.refb, cols = color.pre,
                    assay = "RNA", group.by = "Anno", ncol = 2, pt.size = 0.01) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.checkb

vln.checkb.s <- VlnPlot(GEX.seur, features = check.markers.refb, cols = color.pre,
                    assay = "RNA", group.by = "Anno", ncol = 2, pt.size = 0) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.checkb.s

vln.ref1b <- VlnPlot(ref1.seur, features = check.markers.refb, cols = color.ref1.2,
                    assay = "RNA", group.by = "Anno2", ncol = 2, pt.size = 0.01) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.ref1b

vln.ref1b.s <- VlnPlot(ref1.seur, features = check.markers.refb, cols = color.ref1.2,
                    assay = "RNA", group.by = "Anno2", ncol = 2, pt.size = 0) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.ref1b.s

vln.ref2b <- VlnPlot(ref2.seur, features = check.markers.refb, cols = color.ref2.2,
                    assay = "RNA", group.by = "Anno2", ncol = 2, pt.size = 0.01) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.ref2b

vln.ref2b.s <- VlnPlot(ref2.seur, features = check.markers.refb, cols = color.ref2.2,
                    assay = "RNA", group.by = "Anno2", ncol = 2, pt.size = 0) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="black", alpha=0.55)
vln.ref2b.s

I don’t really think it’s a very proper way to normalize and compare like this
many factors would affect the result bias, including sampling and datatype and datasize
datatype (single cells vs single nuclei) shoud be the biggest issue !
just a test result !!
vln.Tac1.ref1a <- VlnPlot(ref1.seur, features = c("Tac1","Tac1.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Tac1.ref1a

vln.Vip.ref1a <- VlnPlot(ref1.seur, features = c("Vip","Vip.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Vip.ref1a

vln.Gal.ref1a <- VlnPlot(ref1.seur, features = c("Gal","Gal.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Gal.ref1a

vln.Nmu.ref1a <- VlnPlot(ref1.seur, features = c("Nmu","Nmu.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Nmu.ref1a

vln.Tac1.ref2a <- VlnPlot(ref2.seur, features = c("Tac1","Tac1.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Tac1.ref2a

vln.Vip.ref2a <- VlnPlot(ref2.seur, features = c("Vip","Vip.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Vip.ref2a

vln.Gal.ref2a <- VlnPlot(ref2.seur, features = c("Gal","Gal.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Gal.ref2a

vln.Nmu.ref2a <- VlnPlot(ref2.seur, features = c("Nmu","Nmu.norm"), ncol = 2, group.by = "mark", pt.size = 0.05) &
    geom_boxplot(outlier.size = 0, fill="white", width=0.2, size=0.1, alpha=0.55) &
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red", alpha=0.55) & coord_cartesian(ylim = c(-2.5,6))
vln.Nmu.ref2a

cell composition

heatmap

GEX.seur$FB.info <- factor(as.character(GEX.seur$FB.info),
                           levels = c(paste0("CTL.",1:4),
                                      paste0("CKO.",1:4)))

GEX.seur$rep <- paste0("rep",
                       gsub("CTL|CKO","",as.character(GEX.seur$FB.info)))
head(GEX.seur@meta.data)
##                    orig.ident nCount_RNA nFeature_RNA percent.mt percent.rb
## AAACCCAAGGTTGGTG-1    CR7d_LI        620          482  0.0000000  0.3225806
## AAACCCACAAATCGTC-1    CR7d_LI        975          674  0.1025641  0.5128205
## AAACCCACAAGGGCAT-1    CR7d_LI        863          627  0.0000000  0.1158749
## AAACCCACACGGCACT-1    CR7d_LI        341          278  0.0000000  0.0000000
## AAACCCACAGCGAGTA-1    CR7d_LI        818          626  1.1002445  0.6112469
## AAACCCACATAGCACT-1    CR7d_LI        952          678  0.0000000  0.2100840
##                    FB.info      S.Score    G2M.Score Phase seurat_clusters cnt
## AAACCCAAGGTTGGTG-1   CKO.2 -0.004383219 -0.008517888    G1               5 CKO
## AAACCCACAAATCGTC-1   CKO.3 -0.010644959 -0.014196479    G1               6 CKO
## AAACCCACAAGGGCAT-1   CKO.3 -0.009392611  0.011330771   G2M               6 CKO
## AAACCCACACGGCACT-1   CKO.4  0.018126937 -0.004542873     S              13 CKO
## AAACCCACAGCGAGTA-1   CTL.1 -0.013775830 -0.014764338    G1              17 CTL
## AAACCCACATAGCACT-1   CTL.1 -0.010018785  0.033450867   G2M              12 CTL
##                    DoubletFinder0.05 DoubletFinder0.1 preAnno sort_clusters
## AAACCCAAGGTTGGTG-1           Singlet          Singlet    IMN1             5
## AAACCCACAAATCGTC-1           Singlet          Singlet     IN3             6
## AAACCCACAAGGGCAT-1           Singlet          Singlet     IN3             6
## AAACCCACACGGCACT-1           Singlet          Singlet     IN3            13
## AAACCCACAGCGAGTA-1           Singlet          Singlet   IPAN4            17
## AAACCCACATAGCACT-1           Singlet          Singlet    IMN1            12
##                     Anno  Chat.norm  Bnc2.norm  Penk.norm  Fut9.norm  Tac1.norm
## AAACCCAAGGTTGGTG-1  IMN1 -0.6626859 -0.6626859 -0.6626859 -0.6626859 -0.6626859
## AAACCCACAAATCGTC-1   IN3  1.2363122 -0.5865750 -0.5865750 -0.5865750 -0.5865750
## AAACCCACAAGGGCAT-1   IN3 -0.5200643 -0.5200643 -0.5200643 -0.5200643 -0.5200643
## AAACCCACACGGCACT-1   IN3 -0.3075840 -0.3075840 -0.3075840 -0.3075840 -0.3075840
## AAACCCACAGCGAGTA-1 IPAN4 -0.6461557 -0.6461557  1.2075366 -0.6461557 -0.6461557
## AAACCCACATAGCACT-1  IMN1 -0.5929254 -0.5929254 -0.5929254 -0.5929254 -0.5929254
##                      Ahr.norm  Nos1.norm  Etv1.norm   Vip.norm   Gal.norm
## AAACCCAAGGTTGGTG-1 -0.6626859  1.2897465  1.2897465 -0.6626859 -0.6626859
## AAACCCACAAATCGTC-1 -0.5865750 -0.5865750 -0.5865750 -0.5865750 -0.5865750
## AAACCCACAAGGGCAT-1 -0.5200643 -0.5200643 -0.5200643 -0.5200643 -0.5200643
## AAACCCACACGGCACT-1 -0.3075840 -0.3075840 -0.3075840 -0.3075840 -0.3075840
## AAACCCACAGCGAGTA-1 -0.6461557 -0.6461557  1.2075366 -0.6461557 -0.6461557
## AAACCCACATAGCACT-1 -0.5929254  1.9347424  1.4043304 -0.5929254 -0.5929254
##                    Moxd1.norm  Sctr.norm Piezo1.norm   Sst.norm Calb2.norm
## AAACCCAAGGTTGGTG-1 -0.6626859 -0.6626859  -0.6626859 -0.6626859  1.2897465
## AAACCCACAAATCGTC-1 -0.5865750 -0.5865750  -0.5865750 -0.5865750 -0.5865750
## AAACCCACAAGGGCAT-1 -0.5200643 -0.5200643  -0.5200643  1.8034361 -0.5200643
## AAACCCACACGGCACT-1 -0.3075840 -0.3075840  -0.3075840  2.7706653 -0.3075840
## AAACCCACAGCGAGTA-1 -0.6461557 -0.6461557  -0.6461557 -0.6461557 -0.6461557
## AAACCCACATAGCACT-1 -0.5929254 -0.5929254  -0.5929254 -0.5929254 -0.5929254
##                    Calcb.norm   Nmu.norm  Cdh8.norm  Cdh9.norm   Cck.norm
## AAACCCAAGGTTGGTG-1 -0.6626859 -0.6626859 -0.6626859 -0.6626859 -0.6626859
## AAACCCACAAATCGTC-1  1.2363122 -0.5865750 -0.5865750 -0.5865750 -0.5865750
## AAACCCACAAGGGCAT-1 -0.5200643 -0.5200643  1.3273924 -0.5200643 -0.5200643
## AAACCCACACGGCACT-1 -0.3075840 -0.3075840 -0.3075840 -0.3075840 -0.3075840
## AAACCCACAGCGAGTA-1 -0.6461557 -0.6461557 -0.6461557 -0.6461557 -0.6461557
## AAACCCACATAGCACT-1 -0.5929254 -0.5929254 -0.5929254 -0.5929254 -0.5929254
##                    Piezo2.norm Nxph2.norm   rep
## AAACCCAAGGTTGGTG-1  -0.6626859 -0.6626859 rep.2
## AAACCCACAAATCGTC-1  -0.5865750 -0.5865750 rep.3
## AAACCCACAAGGGCAT-1  -0.5200643 -0.5200643 rep.3
## AAACCCACACGGCACT-1  -0.3075840 -0.3075840 rep.4
## AAACCCACAGCGAGTA-1  -0.6461557 -0.6461557 rep.1
## AAACCCACATAGCACT-1  -0.5929254 -0.5929254 rep.1
cowplot::plot_grid(
pheatmap::pheatmap(table(cnt=GEX.seur$FB.info,
      clusters=GEX.seur$Anno)[],
                   main = "Cell Count",
                   gaps_row = c(4),
      gaps_col = c(5,9,12),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,

pheatmap::pheatmap(100*rowRatio(table(cnt=GEX.seur$FB.info,
                                clusters=GEX.seur$Anno)[] ),
                   main = "Cell Ratio",
                   gaps_row = c(4),
      gaps_col = c(5,9,12),
                   cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)

barplot

stat1_Anno <- GEX.seur@meta.data[,c("cnt","rep","Anno")]

stat1_Anno.s <- stat1_Anno %>%
  group_by(cnt,rep,Anno) %>%
  summarise(count=n()) %>%
  mutate(prop= count/sum(count)) %>%
  ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom1.Anno <- stat1_Anno.s %>%
  ggplot(aes(x = Anno, y = 100*prop, color=cnt)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.78, fill="white") +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.cnt, name="") +
  labs(title="Cell Composition of Colon_CR7d.CTL_CKO, Anno", y = "Proportion") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
  
  geom_point(aes(x=Anno, y = 100*prop, color= cnt),
             position = position_dodge(0.75),
             shape=16,alpha=0.75,size=2.15,
             stroke=0.15, show.legend=F)

pcom1.Anno

sig.test

glm.nb - anova.LRT

Sort.N <- c("CTL","CKO")

glm.nb_anova.1.Anno <- list()

for(x1 in 1:2){
  for(x2 in 1:2){
    if(x2>x1){
      
      stat1_Anno.s_N <- stat1_Anno.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
      
      total.N <- stat1_Anno.s_N %>%
        group_by(cnt, rep) %>%
        summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
      
      rownames(total.N) <- paste0(as.character(total.N$cnt),
                                  "_",
                                  as.character(total.N$rep))
      
      stat1_Anno.s_N$total <- total.N[paste0(stat1_Anno.s_N$cnt,
                                            "_",
                                            stat1_Anno.s_N$rep),"total"]
      
      glm.nb_anova.1.Anno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
      
      for(AA in levels(stat1_Anno.s_N$Anno)){
        glm.nb_anova.1.Anno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
          aaa <- stat1_Anno.s_N %>% filter(Anno==AA)
          aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
          aaa.anova <- anova(aaa.glmnb, test = "LRT")
          aaa.anova$'Pr(>Chi)'[2]
        }, error=function(e){
        
          tryCatch({
            aaa <- stat1_Anno.s_N %>% filter(Anno==AA)
            aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
            aaa.anova <- anova(aaa.glmnb, test = "LRT")
            aaa.anova$'Pr(>Chi)'[2]
          }, error=function(e){
            
            tryCatch({
              aaa <- stat1_Anno.s_N %>% filter(Anno==AA)
              aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
              aaa.anova <- anova(aaa.glmnb, test = "LRT")
              aaa.anova$'Pr(>Chi)'[2]
            }, error = function(e){
              NA
            })     
          })
        })
      }
      glm.nb_anova.1.Anno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.1.Anno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]]) 
    }
  }
}
glm.nb_anova.1.Anno_df <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.1.Anno)))
rownames(glm.nb_anova.1.Anno_df) <- names(glm.nb_anova.1.Anno)
colnames(glm.nb_anova.1.Anno_df) <- gsub("X","C",colnames(glm.nb_anova.1.Anno_df))
glm.nb_anova.1.Anno_df
##               EMN1      EMN2      EMN3      EMN4      EMN5      IMN1      IMN2
## CTLvsCKO 0.0378885 0.4043206 0.3528918 0.3419348 0.2361099 0.2643981 0.6000709
##               IMN3      IMN4        IN1       IN2      IN3     IPAN1
## CTLvsCKO 0.1467264 0.6506557 0.05661152 0.2888327 0.451949 0.5173342
##                IPAN2    IPAN3     IPAN4
## CTLvsCKO 0.001861906 0.858837 0.4741643
round(glm.nb_anova.1.Anno_df,4)
##            EMN1   EMN2   EMN3   EMN4   EMN5   IMN1   IMN2   IMN3   IMN4    IN1
## CTLvsCKO 0.0379 0.4043 0.3529 0.3419 0.2361 0.2644 0.6001 0.1467 0.6507 0.0566
##             IN2    IN3  IPAN1  IPAN2  IPAN3  IPAN4
## CTLvsCKO 0.2888 0.4519 0.5173 0.0019 0.8588 0.4742
#saveRDS(GEX.seur,"./Colon.CR7d.Ahr_CKO.rds")

markers

(pass)

DEGs

#
Idents(GEX.seur) <- "cnt"
neur.clusters <- grep("EMN|IMN|IN|IPAN",levels(GEX.seur$Anno), value = T)
neur.clusters
##  [1] "EMN1"  "EMN2"  "EMN3"  "EMN4"  "EMN5"  "IMN1"  "IMN2"  "IMN3"  "IMN4" 
## [10] "IN1"   "IN2"   "IN3"   "IPAN1" "IPAN2" "IPAN3" "IPAN4"
#
all_new <- neur.clusters
all_new
##  [1] "EMN1"  "EMN2"  "EMN3"  "EMN4"  "EMN5"  "IMN1"  "IMN2"  "IMN3"  "IMN4" 
## [10] "IN1"   "IN2"   "IN3"   "IPAN1" "IPAN2" "IPAN3" "IPAN4"
#
list_new <- list()
list_new[["All"]] <- all_new

list_new[["EMNs"]] <- grep("EMN",all_new,value = T)
list_new[["IMNs"]] <- grep("IMN",all_new,value = T)


for(NN in all_new){
  list_new[[NN]] <- NN
}

names_new <- names(list_new)
list_new
## $All
##  [1] "EMN1"  "EMN2"  "EMN3"  "EMN4"  "EMN5"  "IMN1"  "IMN2"  "IMN3"  "IMN4" 
## [10] "IN1"   "IN2"   "IN3"   "IPAN1" "IPAN2" "IPAN3" "IPAN4"
## 
## $EMNs
## [1] "EMN1" "EMN2" "EMN3" "EMN4" "EMN5"
## 
## $IMNs
## [1] "IMN1" "IMN2" "IMN3" "IMN4"
## 
## $EMN1
## [1] "EMN1"
## 
## $EMN2
## [1] "EMN2"
## 
## $EMN3
## [1] "EMN3"
## 
## $EMN4
## [1] "EMN4"
## 
## $EMN5
## [1] "EMN5"
## 
## $IMN1
## [1] "IMN1"
## 
## $IMN2
## [1] "IMN2"
## 
## $IMN3
## [1] "IMN3"
## 
## $IMN4
## [1] "IMN4"
## 
## $IN1
## [1] "IN1"
## 
## $IN2
## [1] "IN2"
## 
## $IN3
## [1] "IN3"
## 
## $IPAN1
## [1] "IPAN1"
## 
## $IPAN2
## [1] "IPAN2"
## 
## $IPAN3
## [1] "IPAN3"
## 
## $IPAN4
## [1] "IPAN4"
names_new
##  [1] "All"   "EMNs"  "IMNs"  "EMN1"  "EMN2"  "EMN3"  "EMN4"  "EMN5"  "IMN1" 
## [10] "IMN2"  "IMN3"  "IMN4"  "IN1"   "IN2"   "IN3"   "IPAN1" "IPAN2" "IPAN3"
## [19] "IPAN4"

run

#test.DEGs_new
# save DEGs' table
df_test.DEGs_new <- data.frame()
for(nn in names_new){

  df_test.DEGs_new <- rbind(df_test.DEGs_new, data.frame(test.DEGs_new[[nn]],Anno=nn))
  
  
}

#write.csv(df_test.DEGs_new, "Colon_CR7d.DEGs_CTLvsCKO_Anno.csv")

stat

cut0: padj 0.05 log2FC 0.1
cut1: padj 0.05 log2FC 0.3
cut2: padj 0.01 log2FC > log2(1.5) (|FC|>1.5)

cut0

# cut0
cut.padj = 0.05
cut.log2FC = 0.1  
  
cut.pct1 = 0.1
stat_test0.DEGs_new <- df_test.DEGs_new %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(Anno,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame()
stat_test0.DEGs_new
##     Anno cluster up.DEGs
## 1    All     CTL       8
## 2    All     CKO       7
## 3   EMN1     CTL       2
## 4   EMN2     CTL       1
## 5   EMN3     CTL       1
## 6   EMN4     CTL       1
## 7   EMN5     CKO       1
## 8   EMNs     CTL       2
## 9   EMNs     CKO       2
## 10  IMN1     CTL       1
## 11  IMN1     CKO       4
## 12  IMN2     CTL       1
## 13  IMN4     CTL       1
## 14  IMNs     CTL       2
## 15  IMNs     CKO       3
## 16   IN3     CTL       1
## 17 IPAN1     CTL       4
## 18 IPAN1     CKO       2
df_test.DEGs_new %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(Anno,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=Anno, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.cnt, name="") +
  labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))

cut1

# cut1
cut.padj = 0.05
cut.log2FC = 0.3  
  
cut.pct1 = 0.1
stat_test1.DEGs_new <- df_test.DEGs_new %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(Anno,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame()
stat_test1.DEGs_new
##     Anno cluster up.DEGs
## 1    All     CTL       4
## 2    All     CKO       1
## 3   EMN1     CTL       1
## 4   EMN2     CTL       1
## 5   EMN3     CTL       1
## 6   EMN4     CTL       1
## 7   EMN5     CKO       1
## 8   EMNs     CTL       1
## 9   EMNs     CKO       1
## 10  IMN1     CTL       1
## 11  IMN1     CKO       1
## 12  IMN2     CTL       1
## 13  IMN4     CTL       1
## 14  IMNs     CTL       2
## 15   IN3     CTL       1
## 16 IPAN1     CTL       4
## 17 IPAN1     CKO       2
df_test.DEGs_new %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(Anno,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=Anno, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.cnt, name="") +
  labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |log2FC|>",cut.log2FC), y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))

cut2

# cut2
cut.padj = 0.01
cut.log2FC = log2(1.5)  
  
cut.pct1 = 0.1
stat_test2.DEGs_new <- df_test.DEGs_new %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(Anno,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame()
stat_test2.DEGs_new
##     Anno cluster up.DEGs
## 1    All     CTL       1
## 2   EMN3     CTL       1
## 3   EMNs     CTL       1
## 4   IMN1     CTL       1
## 5   IMN2     CTL       1
## 6   IMN4     CTL       1
## 7   IMNs     CTL       2
## 8    IN3     CTL       1
## 9  IPAN1     CTL       3
## 10 IPAN1     CKO       1
df_test.DEGs_new %>% filter(p_val_adj < cut.padj & 
                       abs(avg_log2FC) > cut.log2FC & 
                       pct.1 > cut.pct1) %>%
  group_by(Anno,cluster) %>%
  summarise(up.DEGs = n()) %>% as.data.frame() %>%
  ggplot(aes(x=Anno, y=up.DEGs, color = cluster)) +
  geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
  geom_text(aes(label = up.DEGs),vjust=-0.21, show.legend = F, position = position_dodge(0.75) ) +
  theme_classic(base_size = 15) +
  scale_color_manual(values = color.cnt, name="") +
  labs(title=paste0("up.DEGs stat, pct.1>",cut.pct1,", padj<",cut.padj,", |log2FC|>","log2(1.5)"), y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6),
        title =element_text(size=12, face='bold'))

plot

pp_test.DEGs.new <- lapply(list_new, function(x){NA})
#
test.DEGs_new.combine <- test.DEGs_new
test.DEGs_new.combine <- lapply(test.DEGs_new.combine, function(x){
  x[x$cluster=="CTL","avg_log2FC"] <- -x[x$cluster=="CTL","avg_log2FC"]
  x
})

All

pp_test.DEGs.new$All <- test.DEGs_new.combine$All %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="All CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$All

EMNs

pp_test.DEGs.new$EMNs <- test.DEGs_new.combine$EMNs %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="EMNs CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$EMNs

IMNs

pp_test.DEGs.new$IMNs <- test.DEGs_new.combine$IMNs %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IMNs CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IMNs

EMN1

pp_test.DEGs.new$EMN1 <- test.DEGs_new.combine$EMN1 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="EMN1 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$EMN1

EMN2

pp_test.DEGs.new$EMN2 <- test.DEGs_new.combine$EMN2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="EMN2 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$EMN2

EMN3

pp_test.DEGs.new$EMN3 <- test.DEGs_new.combine$EMN3 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="EMN3 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$EMN3

EMN4

pp_test.DEGs.new$EMN4 <- test.DEGs_new.combine$EMN4 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="EMN4 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$EMN4

EMN5

pp_test.DEGs.new$EMN5 <- test.DEGs_new.combine$EMN5 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="EMN5 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$EMN5

IMN1

pp_test.DEGs.new$IMN1 <- test.DEGs_new.combine$IMN1 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IMN1 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IMN1

IMN2

pp_test.DEGs.new$IMN2 <- test.DEGs_new.combine$IMN2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IMN2 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IMN2

IMN3

pp_test.DEGs.new$IMN3 <- test.DEGs_new.combine$IMN3 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IMN3 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IMN3

IMN4

pp_test.DEGs.new$IMN4 <- test.DEGs_new.combine$IMN4 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IMN4 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IMN4

IN1

pp_test.DEGs.new$IN1 <- test.DEGs_new.combine$IN1 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IN1 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IN1

IN2

pp_test.DEGs.new$IN2 <- test.DEGs_new.combine$IN2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IN2 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IN2

IN3

pp_test.DEGs.new$IN3 <- test.DEGs_new.combine$IN3 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IN3 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IN3

IPAN1

pp_test.DEGs.new$IPAN1 <- test.DEGs_new.combine$IPAN1 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IPAN1 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IPAN1

IPAN2

pp_test.DEGs.new$IPAN2 <- test.DEGs_new.combine$IPAN2 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IPAN2 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IPAN2

IPAN3

pp_test.DEGs.new$IPAN3 <- test.DEGs_new.combine$IPAN3 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IPAN3 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IPAN3

IPAN4

pp_test.DEGs.new$IPAN4 <- test.DEGs_new.combine$IPAN4 %>%
  mutate(label=ifelse(((p_val_adj < 1e-3 & avg_log2FC<0) | (p_val_adj < 1e-3 & avg_log2FC>0) | (p_val_adj < 0.05 & abs(avg_log2FC) > 0.1)) &
                        #!grepl("^Gm|^Hsp|^Rps|^Rpl|Rik$|Xist|Tsix|Uty|Ddx3y|Eif2s3y|Kdm5d",gene),gene,""),
                        !grepl("^Rps|^Rpl",gene),gene,""),
         sig.=ifelse(p_val_adj<0.05 & avg_log2FC<0,"CTL",ifelse(p_val_adj<0.05 & avg_log2FC>0,"CKO","None")),
         padj=ifelse(p_val_adj<1e-60,p_val_adj+1e-60,p_val_adj)) %>%
  ggplot(aes(y= -log10(padj), x=avg_log2FC, size=pct.1, label=label,fill=sig.)) +
  geom_point(shape=21, alpha=0.65) +
  geom_text_repel(size=2.5, max.overlaps = 500) +
  scale_fill_manual(values = c("CTL"=as.vector(color.cnt[1]),
                               "CKO"=as.vector(color.cnt[2]),
                               "None"="grey")) +
  theme_classic() + labs(title="IPAN4 CTL vs CKO") +
  guides(size = guide_legend(order = 2), fill=guide_legend(order = 1, reverse = F)) +
  geom_vline(xintercept = c(-0.1,0.1), linetype="dotted")
pp_test.DEGs.new$IPAN4

end